Evaluation of RISK survival models

This document highlights the use of

for the evaluation (RRPlot), and calibration of cox models (CoxRiskCalibration) or logistic models (CalibrationProbPoissonRisk) of survival data.

Furthermore, it can be used to evaluate any Risk index that reruns the probability of a future event on external data-set.

This document will use the survival::rotterdam, and survival::gbsg data-sets to train and predict the risk of cancer recurrence after surgery. Both Cox and Logistic models will be trained and evaluated.

Here are some sample plots returned by the evaluated functions:

The libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

Breast Cancer Royston-Altman data

data(gbsg, package=“survival”) and data(rotterdam, package=“survival”)

gbsgdata <- gbsg
rownames(gbsgdata) <- gbsgdata$pid
gbsgdata$pid <- NULL

odata <-rotterdam
rownames(odata) <- odata$pid
odata$pid <- NULL
odata$rfstime <- odata$rtime
odata$status <- odata$recur
odata$rtime <- NULL
odata$recur <- NULL

odata <- odata[,colnames(odata) %in% colnames(gbsgdata)]

odata$size <- 10*(odata$size=="<=20") + 
  35*(odata$size=="20-50") + 
  60*(odata$size==">50")

data <- as.data.frame(model.matrix(Surv(rfstime,status)~.*.,odata))

data$`(Intercept)` <- NULL

dataBrestCancerTrain <- cbind(time=odata[rownames(data),"rfstime"],status=odata[rownames(data),"status"],data)

colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),":","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain)," ","")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),"\\.","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),"-","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),">","_")
dataBrestCancerTrain$time <- dataBrestCancerTrain$time/365 ## To years


pander::pander(table(odata[rownames(data),"status"]),caption="rotterdam")
rotterdam
0 1
1464 1518

data(gbsg, package=“survival”) data conditioning

gbsgdata <- gbsgdata[,colnames(odata)]
data <- as.data.frame(model.matrix(Surv(rfstime,status)~.*.,gbsgdata))

data$`(Intercept)` <- NULL

dataBrestCancerTest <- cbind(time=gbsgdata[rownames(data),"rfstime"],status=gbsgdata[rownames(data),"status"],data)

colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),":","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest)," ","")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),"\\.","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),"-","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),">","_")
dataBrestCancerTest$time <- dataBrestCancerTest$time/365

pander::pander(table(odata[rownames(data),"status"]), caption="gbsg")
gbsg
0 1
499 183

Cox Modeling

ml <- BSWiMS.model(Surv(time,status)~.,data=dataBrestCancerTrain,loops=1,NumberofRepeats = 5)

—–.

sm <- summary(ml)
pander::pander(sm$coefficients)
  Estimate lower HR upper u.Accuracy r.Accuracy full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI z.NRI Delta.AUC Frequency
age_nodes 0.000716 1.001 1.001 1.001 0.626 0.600 0.632 0.630 0.601 0.634 0.03040 0.4594 12.81 14.37 0.033056 1
size_grade 0.005649 1.005 1.006 1.006 0.598 0.623 0.632 0.599 0.626 0.634 0.01868 0.3914 9.82 11.29 0.007947 1
nodes 0.086582 1.082 1.090 1.099 0.637 0.642 0.643 0.640 0.643 0.644 0.00745 0.0564 8.33 1.66 0.000148 1
size 0.006888 1.005 1.007 1.009 0.595 0.641 0.643 0.595 0.642 0.644 0.01447 0.3587 8.05 9.97 0.001322 1
size_nodes -0.000378 1.000 1.000 1.000 0.624 0.643 0.643 0.629 0.644 0.644 0.00346 0.3430 7.25 9.57 -0.000377 1
age_size -0.000149 1.000 1.000 1.000 0.567 0.627 0.632 0.568 0.630 0.634 0.00635 0.1935 5.95 5.36 0.004078 1
grade 0.204934 1.146 1.227 1.314 0.565 0.637 0.643 0.561 0.638 0.644 0.00926 0.2069 5.88 6.31 0.005344 1
age -0.003113 0.996 0.997 0.998 0.513 0.628 0.643 0.513 0.628 0.644 0.00416 0.0917 5.27 2.51 0.015465 1
grade_nodes -0.013784 0.981 0.986 0.992 0.635 0.645 0.643 0.639 0.646 0.644 0.00207 -0.0910 5.03 -2.55 -0.002609 1

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

timeinterval <- 5 # Five years

h0 <- sum(dataBrestCancerTrain$status & dataBrestCancerTrain$time <= timeinterval)
h0 <- h0/sum((dataBrestCancerTrain$time > timeinterval) | (dataBrestCancerTrain$status==1))

pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.429 5
index <- predict(ml,dataBrestCancerTrain)
rdata <- cbind(dataBrestCancerTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Time to event

toinclude <- rdata[,1]==1 
obstiemToEvent <- dataBrestCancerTrain[,"time"]
tmin<-min(obstiemToEvent)
sum(toinclude)

[1] 1518

timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent,timetoEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
  Estimate Std. Error t value Pr(>|t|)
timetoEvent[toinclude] 0.552 0.0112 49.4 3.39e-318
Fitting linear model: obstiemToEvent[toinclude] ~ 0 + timetoEvent[toinclude]
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
1518 2.67 0.617 0.616
plot(timetoEvent,obstiemToEvent,
     col=1+rdata[,1],
     xlab="Expected time",
     ylab="Observed time",
     main="Expected vs. Observed",
     xlim=c(tmin,tmax),
     ylim=c(tmin,tmax),
     log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
             pch=c(1,1,-1),
             col=c(1,2,"blue"),
             lty=c(-1,-1,1)
             )

MADerror2 <- mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude]))
pander::pander(MADerror2)

3.12

The Time vs. Events are not calibrated. Lets do the calibration

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.459 0.389 0.320 0.214 0.18549 0.4996
RR 1.690 1.713 1.799 2.376 1.00000 1.7255
RR_LCI 1.586 1.603 1.666 1.869 0.00000 1.6196
RR_UCI 1.802 1.830 1.942 3.019 0.00000 1.8383
SEN 0.299 0.462 0.644 0.965 1.00000 0.2464
SPE 0.900 0.798 0.646 0.125 0.00137 0.9310
BACC 0.599 0.630 0.645 0.545 0.50068 0.5887
NetBenefit 0.110 0.172 0.246 0.374 0.39742 0.0916
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.13 1.07 1.19 4.66e-06
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.16 1.16 1.15 1.17
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.35 1.35 1.35 1.35
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.676 0.676 0.663 0.691
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.694 0.675 0.713
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.299 0.276 0.323
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.459 0.389
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 465.079317 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1985 816 1144 93.9 385.7
class=1 396 248 177 28.0 31.8
class=2 601 454 197 336.3 391.3

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataBrestCancerTrain,"status","time",timeInterval=timeinterval)


pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.696 1.62 6.95

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataBrestCancerTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Cal. Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Time to event after calibration

timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent,timetoEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
  Estimate Std. Error t value Pr(>|t|)
timetoEvent[toinclude] 0.644 0.013 49.4 3.39e-318
Fitting linear model: obstiemToEvent[toinclude] ~ 0 + timetoEvent[toinclude]
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
1518 2.67 0.617 0.616
plot(timetoEvent,obstiemToEvent,
     col=1+rdata[,1],
     xlab="Expected time",
     ylab="Observed time",
     main="Expected vs. Observed",
     xlim=c(tmin,tmax),
     ylim=c(tmin,tmax),
     log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
             pch=c(1,1,-1),
             col=c(1,2,"blue"),
             lty=c(-1,-1,1)
             )

MADerror2 <- c(MADerror2,mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude])))
pander::pander(MADerror2)

3.12 and 2.63

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.631 0.550 0.465 0.323 0.28321 0.500
RR 1.690 1.713 1.799 2.376 1.00000 1.751
RR_LCI 1.586 1.603 1.666 1.869 0.00000 1.630
RR_UCI 1.802 1.830 1.942 3.019 0.00000 1.881
SEN 0.299 0.462 0.644 0.965 1.00000 0.578
SPE 0.900 0.798 0.646 0.125 0.00137 0.706
BACC 0.599 0.630 0.645 0.545 0.50068 0.642
NetBenefit 0.068 0.114 0.177 0.286 0.31537 0.150
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.998 0.949 1.05 0.959
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1 1 0.995 1.01
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.01 1.01 1.01 1.01
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.676 0.676 0.662 0.691
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.694 0.675 0.713
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.299 0.276 0.323
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.631 0.55
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 465.079317 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1985 816 1144 93.9 385.7
class=1 396 248 177 28.0 31.8
class=2 601 454 197 336.3 391.3

Performance on the external data set

index <- predict(ml,dataBrestCancerTest)
pp <- predictionStats_binary(cbind(dataBrestCancerTest$status,index),plotname="Breast Cancer")

par(op)


prob <- ppoisGzero(index,h0)
rdata <- cbind(dataBrestCancerTest$status,prob)
rrCoxTestAnalysis <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

External Data Report

pander::pander(t(rrCoxTestAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.631 @:0.55 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6303 0.5507 0.5825 0.336 2.70e-01 0.500
RR 1.7994 1.6427 1.7581 3.279 2.64e+01 1.603
RR_LCI 1.5366 1.3951 1.4980 1.641 5.65e-02 1.352
RR_UCI 2.1071 1.9343 2.0632 6.552 1.23e+04 1.902
SEN 0.3311 0.4515 0.4181 0.977 1.00e+00 0.552
SPE 0.8734 0.7571 0.8088 0.111 1.55e-02 0.656
BACC 0.6022 0.6043 0.6134 0.544 5.08e-01 0.604
NetBenefit 0.0226 0.0289 0.0318 0.172 2.30e-01 0.047
pander::pander(t(rrCoxTestAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.33 1.19 1.49 1.73e-06
pander::pander(rrCoxTestAnalysis$c.index,caption="C. Index")
  • C Index: 0.664

  • Dxy: 0.328

  • S.D.: 0.0311

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 176738

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.664 0.665 0.631 0.695
pander::pander(t(rrCoxTestAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.66 0.619 0.7
pander::pander((rrCoxTestAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.328 0.275 0.384
pander::pander((rrCoxTestAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.873 0.836 0.905
pander::pander(t(rrCoxTestAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.631 0.55
pander::pander(rrCoxTestAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 81.471750 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 457 164 221.4 14.888 58.181
class=1 82 37 33.2 0.438 0.494
class=2 147 98 44.4 64.710 77.254

Calibrating the index on the test data

calprob <- CoxRiskCalibration(ml,dataBrestCancerTest,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.535 0.925 4.87
rdata <- cbind(dataBrestCancerTest$status,calprob$prob)

rrAnalysis <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTest$time,
                     title="Cal. Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

After Calibration Report

pander::pander(t(rrAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.5843 0.483 0.489 0.270 2.15e-01 0.4991
RR 1.7405 1.732 1.758 3.279 2.64e+01 1.7385
RR_LCI 1.4790 1.475 1.498 1.641 5.65e-02 1.4816
RR_UCI 2.0483 2.035 2.063 6.552 1.23e+04 2.0399
SEN 0.2676 0.425 0.418 0.977 1.00e+00 0.3980
SPE 0.8992 0.798 0.809 0.111 1.55e-02 0.8191
BACC 0.5834 0.612 0.613 0.544 5.08e-01 0.6086
NetBenefit 0.0367 0.079 0.079 0.240 2.84e-01 0.0718
pander::pander(t(rrAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.23 1.09 1.37 0.000607
pander::pander(rrAnalysis$c.index,caption="C. Index")
  • C Index: 0.664

  • Dxy: 0.328

  • S.D.: 0.0311

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 176737

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.664 0.665 0.632 0.695
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.66 0.619 0.7
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.264 0.215 0.318
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.865 0.927
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.585 0.483
pander::pander(rrAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 80.835092 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 482 173 232.4 15.20 69.5
class=1 86 47 32.0 7.02 7.9
class=2 118 79 34.6 57.14 65.4

Logistic Model

Here we train a logistic model on the same data set

## Only label subjects that present event withing five years

dataBrestCancerR <- subset(dataBrestCancerTrain, time>=5 | status==1)
dataBrestCancerR$status <- dataBrestCancerR$status * (dataBrestCancerR$time < 5)
dataBrestCancerR$time <- NULL

#ml <- BSWiMS.model(status~1,data=dataBrestCancerR,loops=20,NumberofRepeats = 5)
mlog <- BSWiMS.model(status~1,data=dataBrestCancerR,loops=1,NumberofRepeats = 5)

—–..

sm <- summary(mlog)
pander::pander(sm$coefficients)
  Estimate lower OR upper u.Accuracy r.Accuracy full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI z.NRI Delta.AUC Frequency
size_nodes 1.05e-03 1.001 1.001 1.001 0.669 0.571 0.668 0.627 0.500 0.628 0.11233 0.63654 17.86 18.870 0.128490 1
nodes 4.33e-02 1.040 1.044 1.048 0.676 0.634 0.690 0.639 0.621 0.662 0.07110 0.57106 14.13 16.179 0.040494 1
grade_nodes 1.50e-02 1.014 1.015 1.016 0.682 0.637 0.686 0.649 0.624 0.655 0.06580 0.54866 13.66 15.650 0.031087 1
age_nodes 1.06e-03 1.001 1.001 1.001 0.678 0.653 0.686 0.642 0.621 0.657 0.03346 0.21312 9.39 5.710 0.035896 1
size_grade 1.75e-03 1.001 1.002 1.002 0.632 0.682 0.686 0.626 0.646 0.655 0.01787 0.29411 6.74 7.728 0.008648 1
age_size 8.73e-05 1.000 1.000 1.000 0.608 0.682 0.686 0.577 0.649 0.657 0.01534 0.29152 6.41 7.652 0.007600 1
grade 2.27e-01 1.168 1.254 1.347 0.571 0.683 0.690 0.500 0.653 0.662 0.01340 0.19036 6.20 4.983 0.008461 1
age_meno -6.04e-03 0.992 0.994 0.996 0.571 0.676 0.686 0.500 0.645 0.657 0.00782 0.08057 4.76 2.337 0.012065 1
age_pgr -5.42e-06 1.000 1.000 1.000 0.571 0.686 0.686 0.500 0.656 0.657 0.00512 0.00745 4.11 0.194 0.000417 1
age_grade -1.65e-03 0.997 0.998 0.999 0.574 0.690 0.690 0.507 0.661 0.662 0.00454 0.11372 3.60 2.960 0.000315 1
meno_grade 1.02e-01 1.045 1.107 1.173 0.571 0.683 0.686 0.500 0.652 0.657 0.00425 0.20428 3.47 5.343 0.004441 1
nodes_hormon -1.38e-02 0.979 0.986 0.994 0.587 0.688 0.686 0.526 0.658 0.655 0.00280 0.45522 3.44 12.150 -0.002853 1
size 3.94e-03 1.002 1.004 1.006 0.611 0.693 0.690 0.618 0.663 0.662 0.00507 0.21050 3.42 5.600 -0.001075 1
meno_pgr 3.19e-04 1.000 1.000 1.001 0.571 0.687 0.686 0.500 0.657 0.657 0.00316 0.05977 3.35 1.558 -0.000429 1
pgr -1.07e-04 1.000 1.000 1.000 0.571 0.689 0.686 0.500 0.659 0.655 0.00257 0.19759 2.64 5.745 -0.004123 1
meno_nodes -2.60e-02 0.955 0.974 0.994 0.640 0.686 0.686 0.595 0.656 0.657 0.00264 -0.06329 2.59 -1.645 0.000631 1
grade_pgr -3.51e-05 1.000 1.000 1.000 0.571 0.669 0.668 0.500 0.627 0.628 0.00241 0.17471 2.55 5.058 0.001252 1
meno_size 2.34e-03 1.000 1.002 1.004 0.604 0.691 0.690 0.578 0.663 0.662 0.00185 0.10227 2.43 2.662 -0.001378 1

Logistic Model Performance

op <- par(no.readonly = TRUE)


cprob <- predict(mlog,dataBrestCancerTrain)

rdata <- cbind(dataBrestCancerTrain$status,cprob)
rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Logistic Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=5.0)

par(op)

Training Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.542 0.431 0.394 0.255 0.130969 0.500
RR 1.765 1.739 1.799 2.213 1.000000 1.773
RR_LCI 1.659 1.627 1.676 1.764 0.000000 1.665
RR_UCI 1.879 1.858 1.931 2.777 0.000000 1.888
SEN 0.327 0.470 0.566 0.962 1.000000 0.374
SPE 0.900 0.799 0.731 0.125 0.000683 0.874
BACC 0.613 0.635 0.648 0.543 0.500342 0.624
NetBenefit 0.108 0.165 0.202 0.342 0.435125 0.129
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.957 0.91 1.01 0.0901
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
  • C Index: 0.68

  • Dxy: 0.36

  • S.D.: 0.014

  • n: 2982

  • missing: 0

  • uncensored: 1518

  • Relevant Pairs: 6184528

  • Concordant: 4206584

  • Uncertain: 2703838

  • cstatCI:

    mean.C Index median lower upper
    0.68 0.68 0.666 0.694
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.695 0.677 0.714
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.327 0.303 0.351
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.541 0.431
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 541.976716 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1974 804 1144 100.9 415.3
class=1 365 218 170 13.4 15.1
class=2 643 496 204 418.2 490.7

Results on the validation set using Logistic model

pre <- predict(mlog,dataBrestCancerTest)
rdata <- cbind(dataBrestCancerTest$status,pre)

rrAnalysis <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Logistic Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=5)

par(op)

Validation Report

pander::pander(t(rrAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.541 @:0.431 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.542 0.431 0.439 0.306 2.31e-01 0.4996
RR 1.792 1.702 1.756 2.678 2.20e+01 1.7318
RR_LCI 1.529 1.428 1.477 1.679 4.75e-02 1.4731
RR_UCI 2.100 2.029 2.088 4.271 1.02e+04 2.0360
SEN 0.395 0.595 0.579 0.950 1.00e+00 0.4482
SPE 0.832 0.638 0.669 0.181 1.29e-02 0.7804
BACC 0.613 0.617 0.624 0.565 5.06e-01 0.6143
NetBenefit 0.060 0.105 0.106 0.210 2.69e-01 0.0717
pander::pander(t(rrAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.13 1 1.26 0.0428
pander::pander(rrAnalysis$c.index,caption="C. Index")
  • C Index: 0.669

  • Dxy: 0.338

  • S.D.: 0.0309

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 178115

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.669 0.67 0.637 0.699
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.669 0.628 0.709
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.395 0.339 0.453
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.832 0.791 0.868
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.541 0.431
pander::pander(rrAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 92.507991 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 369 121 181.7 20.2997 52.3868
class=1 134 60 61.7 0.0479 0.0604
class=2 183 118 55.5 70.2342 88.0195

Logistic Model Poisson Calibration

riskdata <- cbind(dataBrestCancerTrain$status,predict(mlog,dataBrestCancerTrain,type="prob"),dataBrestCancerTrain$time)
calprob <- CalibrationProbPoissonRisk(riskdata)

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Logistic Calibration Parameters")
h0 Gain DeltaTime
0.676 1.31 7.14
timeinterval <- calprob$timeInterval;
gain <- calprob$hazardGain

rdata <- cbind(dataBrestCancerTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Cal. Logistic Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

Report of the calibrated logistic: training

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6395 0.521 0.480 0.319 0.167426 0.500
RR 1.7654 1.739 1.799 2.213 1.000000 1.759
RR_LCI 1.6587 1.627 1.676 1.764 0.000000 1.643
RR_UCI 1.8790 1.858 1.931 2.777 0.000000 1.882
SEN 0.3267 0.470 0.566 0.962 1.000000 0.507
SPE 0.8996 0.799 0.731 0.125 0.000683 0.774
BACC 0.6132 0.635 0.648 0.543 0.500342 0.641
NetBenefit 0.0789 0.132 0.166 0.288 0.410407 0.147
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.02 0.974 1.08 0.336
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
  • C Index: 0.68

  • Dxy: 0.36

  • S.D.: 0.014

  • n: 2982

  • missing: 0

  • uncensored: 1518

  • Relevant Pairs: 6184528

  • Concordant: 4206588

  • Uncertain: 2703838

  • cstatCI:

    mean.C Index median lower upper
    0.68 0.68 0.667 0.694
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.695 0.677 0.714
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.327 0.303 0.351
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.639 0.521
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 541.976716 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1974 804 1144 100.9 415.3
class=1 365 218 170 13.4 15.1
class=2 643 496 204 418.2 490.7
probLog <- predict(mlog,dataBrestCancerTest)
aprob <- adjustProb(probLog,gain)

rdata <- cbind(dataBrestCancerTest$status,aprob)
rrAnalysisTestLogistic <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Cal. Logistic Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

Report of the calibrated validation

pander::pander(t(rrAnalysisTestLogistic$keyPoints),caption="Threshold values")
Threshold values
  @:0.639 @:0.521 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.63882 0.5212 0.5294 0.379 2.90e-01 0.5001
RR 1.79193 1.7024 1.7562 2.678 2.20e+01 1.7026
RR_LCI 1.52914 1.4283 1.4771 1.679 4.75e-02 1.4179
RR_UCI 2.09988 2.0290 2.0880 4.271 1.02e+04 2.0446
SEN 0.39465 0.5953 0.5786 0.950 1.00e+00 0.6421
SPE 0.83204 0.6382 0.6693 0.181 1.29e-02 0.5866
BACC 0.61335 0.6168 0.6239 0.565 5.06e-01 0.6144
NetBenefit 0.00447 0.0374 0.0423 0.132 2.09e-01 0.0466
pander::pander(t(rrAnalysisTestLogistic$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.22 1.08 1.36 0.000902
pander::pander(rrAnalysisTestLogistic$c.index,caption="C. Index")
  • C Index: 0.669

  • Dxy: 0.338

  • S.D.: 0.0309

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 178115

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.669 0.669 0.637 0.7
pander::pander(t(rrAnalysisTestLogistic$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.669 0.628 0.709
pander::pander((rrAnalysisTestLogistic$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.395 0.339 0.453
pander::pander((rrAnalysisTestLogistic$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.832 0.791 0.868
pander::pander(t(rrAnalysisTestLogistic$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.639 0.521
pander::pander(rrAnalysisTestLogistic$surdif,caption="Logrank test")
Logrank test Chisq = 92.507991 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 369 121 181.7 20.2997 52.3868
class=1 134 60 61.7 0.0479 0.0604
class=2 183 118 55.5 70.2342 88.0195

Comparing the COX and Logistic Models on the Independent Data

pander::pander(t(rrCoxTestAnalysis$OAcum95ci))
mean 50% 2.5% 97.5%
0.842 0.842 0.841 0.844
pander::pander(t(rrAnalysisTestLogistic$OAcum95ci))
mean 50% 2.5% 97.5%
0.791 0.791 0.791 0.792
pander::pander(t(rrCoxTestAnalysis$OE95ci))
mean 50% 2.5% 97.5%
1.11 1.11 1.07 1.13
pander::pander(t(rrAnalysisTestLogistic$OE95ci))
mean 50% 2.5% 97.5%
0.989 0.989 0.963 1.02
maxobs <- sum(dataBrestCancerTest$status)

par(mfrow=c(1,2),cex=0.75)

plot(rrCoxTestAnalysis$CumulativeOvs[,1:2],type="l",lty=1,
     main="Cumulative Probability",
     xlab="Observed",
     ylab="Cumulative Probability",
     ylim=c(0,maxobs),
     xlim=c(0,maxobs))
lines(rrAnalysisTestLogistic$CumulativeOvs[,1:2],lty=2,col="red")
lines(x=c(0,maxobs),y=c(0,maxobs),lty=3,col="gray")
legend("topleft",legend = c("Cox","Logistic","Ideal"),
       col=c("black","red","gray"),
       lty=c(1,2,3),
       cex=0.75
)


plot(rrCoxTestAnalysis$CumulativeOvs$Observed,
     rrCoxTestAnalysis$CumulativeOvs$Cumulative-
       rrCoxTestAnalysis$CumulativeOvs$Observed,
     main="Cumulative Risk Difference",
     xlab="Observed",
     ylab="Cumulative Risk - Observed",
     type="l",
     lty=1)
lines(rrAnalysisTestLogistic$CumulativeOvs$Observed,
     rrAnalysisTestLogistic$CumulativeOvs$Cumulative-
       rrAnalysisTestLogistic$CumulativeOvs$Observed,
     lty=2,
     col="red")
legend("topleft",legend = c("Cox","Logistic"),
       col=c("black","red"),
       lty=c(1,2),
       cex=0.75
)

plot(rrCoxTestAnalysis$OEData[,2:3],type="l",lty=1,
     main="Expected over Time",
     xlab="Observed",
     ylab="Expected",
     ylim=c(0,maxobs),
     xlim=c(0,maxobs))
lines(rrAnalysisTestLogistic$OEData[,2:3],lty=2,col="red")
lines(x=c(0,maxobs),y=c(0,maxobs),lty=3,col="gray")
legend("topleft",legend = c("Cox","Logistic","Ideal"),
       col=c("black","red","gray"),
       lty=c(1,2,3),
       cex=0.75
)

plot(rrCoxTestAnalysis$OEData$Observed,
     rrCoxTestAnalysis$OEData$Expected-
       rrCoxTestAnalysis$OEData$Observed,
     main="Expected vs Observed Difference",
     xlab="Observed",
     ylab="Cumulative - Observed",
     type="l",
     lty=1)
lines(rrAnalysisTestLogistic$OEData$Observed,
     rrAnalysisTestLogistic$OEData$Expected-
       rrAnalysisTestLogistic$OEData$Observed,
     lty=2,col="red")

legend("bottomleft",legend = c("Cox","Logistic"),
       col=c("black","red"),
       lty=c(1,2),
       cex=0.75
)

par(op)